library(tidyverse)
library(ggplot2)

Select the years to analyse!

energy_load <- read.csv("../data/load_15-24.csv")
energy_load$date <- as.POSIXct(energy_load$date, tz = "UTC")

Energy Load

Summary statistics

summary(energy_load)
##       date                             load          hour_int   
##  Min.   :2015-01-01 00:00:00.00   Min.   :30909   Min.   : 0.0  
##  1st Qu.:2017-04-28 06:30:00.00   1st Qu.:48110   1st Qu.: 6.0  
##  Median :2019-08-24 12:00:00.00   Median :56021   Median :12.0  
##  Mean   :2019-08-24 11:34:26.74   Mean   :56317   Mean   :11.5  
##  3rd Qu.:2021-12-19 16:30:00.00   3rd Qu.:64622   3rd Qu.:17.5  
##  Max.   :2024-04-15 23:00:00.00   Max.   :81078   Max.   :23.0  
##   weekday_int      month_int      working_day    
##  Min.   :1.000   Min.   : 1.000   Mode :logical  
##  1st Qu.:2.000   1st Qu.: 3.000   FALSE:23279    
##  Median :4.000   Median : 6.000   TRUE :58152    
##  Mean   :4.001   Mean   : 6.392                  
##  3rd Qu.:6.000   3rd Qu.: 9.000                  
##  Max.   :7.000   Max.   :12.000
paste("SD for load: ", sapply(energy_load, sd, na.rm = TRUE)[3], " MWh")
## [1] "SD for load:  6.9221915040762  MWh"

The average load per hour for the whole time series

energy_load |>
    group_by(hour_int) |>
    summarise(mean_load = mean(load)) |>
    ggplot(aes(x = hour_int, y = mean_load)) +
    geom_point() +
    geom_line() +
    labs(
        x = "Hour of the day",
        y = "Mean Load",
        title = "Average Load over the years 2015-2024 per day",
    )

The average load per hour grouped by year

# Add a new column for the year


# Group by hour and year, then calculate the mean load
avg_load_by_hour_year <- energy_load |>
    mutate(year = year(date)) |>
    group_by(hour_int, year) |>
    summarise(mean_load = mean(load))
## `summarise()` has grouped output by 'hour_int'. You can override using the
## `.groups` argument.
# Plot
avg_load_by_hour_year |>
    ggplot(aes(x = hour_int, y = mean_load, colour = factor(year))) +
    geom_point() +
    geom_line() +
    labs(
        x = "Hour of the day",
        y = "Mean Load",
        title = "Average Load over the years 2015-2024 per day",
    )

Boxplot of the load per day for the weeks from 2022 - 2024

energy_load |>
    ggplot(aes(x = as.factor(weekday_int), y = load)) +
    geom_boxplot() +
    labs(x = "Weekday", y = "Load in MWh", title = "Load per weekday from 2015 - 2024")

Load over the whole time period by months and grouped by working day

month <- 2
energy_load |>
    filter(month_int == month) |>
    ggplot(aes(hour_int, load, color = working_day)) +
    geom_point() +
    labs(
        x = "Hour of the day",
        y = "Load in MWh",
        title = "Load per hour for the period 2015 - 2024",
        subtitle = paste("grouped by working day and filtered by month ", month)
    )

Overview of a time series of loads per Month for a given year and month

# --- Get the time series of loads for a given year and month ---
year <- 2024
month <- 1
# Create the figure
energy_load |>
    filter(year(date) == year & month_int == month) |>
    mutate(week_day_hour = weekday_int + hour_int / 24, week_int = week(date)) |>
    ggplot(aes(x = week_day_hour, y = load, colour = factor(week_int))) +
    geom_point(size = 0.5) +
    geom_line() +
    scale_x_continuous(breaks = 1:7, labels = c("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")) +
    labs(
        x = "Day of the week",
        y = "Load in MWh",
        colour = "Week of the month",
        title = paste("Load per hour for the year ", year, " and the month ", month)
    )

A histogramm of the data for the whole time series

ggplot(energy_load, aes(load)) +
    geom_histogram(binwidth = 500) +
    labs(
        x = "Load in MWh",
        title = "Histogramm of all data points from 2015 - 2024",
    )

A histogramm of the data grouped by working days and weekends

ggplot(energy_load, aes(load)) +
    geom_histogram(binwidth = 500) +
    labs(
        title = "Histogramm of Loads for 2015 - 2024 for Working Days",
        x = "Load in MWh"
    ) +
    facet_wrap(~working_day)

Energy Load Peaks

Getting the peaks

peaks <- energy_load |>
    group_by(as.Date(date)) |>
    slice(which.max(load))

Histogramm of the peaks

peaks |>
    ggplot(aes(x = load)) +
    geom_histogram(binwidth = 250) +
    labs(
        title = "Histogramm of load peaks from 2015 to 2024",
        x = "Load in MWh"
    )

Histogramm of the peaks grouped by working days

peaks |>
    ggplot(aes(load)) +
    geom_histogram(binwidth = 250) +
    labs(
        title = "Histogramm of Loads for 2015 - 2024 for Working Days",
        x = "Load in MWh"
    ) +
    facet_wrap(~working_day)

Scatterplot of the load peaks per hour grouped by working days

peaks |>
    ggplot(aes(x = hour_int, y = load, color = working_day)) +
    geom_point() +
    scale_x_continuous(breaks = seq(min(peaks$hour_int), max(peaks$hour_int), by = 1)) +
    labs(
        title = "Scatterplot of Load peaks from 2015 - 2024",
        x = "Hour of the day",
        y = "Load in MWh"
    )

Density of the peaks

ggplot(peaks, aes(x = load)) +
    geom_density() +
    labs(
        title = "Density of the load peaks 2015 - 2024",
        x = "Load in MWh"
    )

Density of the peaks grouped by working days

ggplot(peaks, aes(x = load)) +
    geom_density() +
    labs(
        title = "Density of the load peaks 2015 - 2024 grouped by working days",
        x = "Load in MWh"
    ) +
    facet_wrap(~working_day)

Bivariate Density of Load peaks

library(MASS)
library(plotly)

Density of energy load on hourly resolution

den3d <- kde2d(energy_load$load, energy_load$hour_int)
plot_ly(x = den3d$x, y = den3d$y, z = den3d$z) |> add_surface()

Density of load peaks per day for working days

peaks_working <- peaks |>
    filter(working_day == TRUE)

peaks_working |>
    ggplot(aes(load)) +
    geom_density() +
    labs(
        title = "Density of the load peaks for the day grouped by hour for working days",
        x = "Load in MWh"
    ) +
    facet_wrap(~hour_int)

Depicting the density of hour 11

peaks_working |>
    filter(hour_int == 11) |>
    ggplot(aes(load)) +
    geom_density() +
    labs(
        title = "Density of the load peaks on Working days at hour 11",
        x = "Load in MWh"
    )

Depicting the density of hour 12

peaks_working |>
    filter(hour_int == 12) |>
    ggplot(aes(load)) +
    geom_density() +
    labs(
        title = "Density of the load peaks on Working days at hour 12",
        x = "Load in MWh"
    )

Depicting the density of hour 13

peaks_working |>
    filter(hour_int == 13) |>
    ggplot(aes(load)) +
    geom_density() +
    labs(
        title = "Density of the load peaks on Working days at hour 13",
        x = "Load in MWh"
    )

Density of load peaks per day for weekends

peaks_weekends <- peaks |>
    filter(working_day == FALSE)

peaks_weekends |>
    ggplot(aes(load)) +
    geom_density() +
    labs(
        title = "Density of the load peaks for the day grouped by hour for weekends",
        x = "Load in MWh"
    ) +
    facet_wrap(~hour_int)
## Warning: Groups with fewer than two data points have been dropped.
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

Depicting the density of hour 18

peaks_weekends |>
    filter(hour_int == 18) |>
    ggplot(aes(load)) +
    geom_density() +
    labs(
        title = "Density of the load peaks for the weekends at hour 18",
        x = "Load in MWh"
    )

Depicting the density of hour 19

peaks_weekends |>
    filter(hour_int == 19) |>
    ggplot(aes(load)) +
    geom_density() +
    labs(
        title = "Density of the load peaks for the weekends at hour 19",
        x = "Load in MWh"
    )

Depicting the density of hour 20

peaks_weekends |>
    filter(hour_int == 20) |>
    ggplot(aes(load)) +
    geom_density() +
    labs(
        title = "Density of the load peaks for the weekends at hour 20",
        x = "Load in MWh"
    )